podatki <- read.table("/cloud/project/Poglavje 6/Naloga 2/Avtomobili.csv", header=TRUE, sep=";", dec="," )

head(podatki)
##   ID  Ccm  Km Vrtljajimax Dolžina Teža Pospešek Hitrost Poraba_90
## 1  1 1995 146        5800     455 1380      9.9     210       6.5
## 2  2 1969 150        6200     429 1370      8.4     210       6.2
## 3  3 1969 150        6200     444 1300      9.0     210       6.2
## 4  4 1969 150        6200     429 1370      8.4     215       6.2
## 5  5 1969 150        6200     410 1240      8.4     210       6.2
## 6  6 1747 140        6300     445 1270     10.0     205       6.1
##   Poraba_120 Motor Pogon  Cena
## 1        8.3     2     1 33263
## 2        7.8     2     1 37387
## 3        8.0     2     1 25453
## 4        7.8     2     1 35994
## 5        8.0     2     1 19061
## 6        8.2     2     1 20215

Opis spremenljivk:

podatki$MotorF <- factor(podatki$Motor, 
                          levels = c(1, 2), 
                          labels = c("dizelski", "bencinski"))
podatki$PogonF <- factor(podatki$Pogon, 
                          levels = c(1, 2, 3), 
                          labels = c("spredaj", "zadaj", "4x4"))
library(pastecs)
## 
## Attaching package: 'pastecs'
## The following object is masked from 'package:magrittr':
## 
##     extract
## The following objects are masked from 'package:dplyr':
## 
##     first, last
round(stat.desc(podatki[c(2:13)], basic=FALSE), 2)
##                    Ccm      Km Vrtljajimax Dolžina     Teža Pospešek
## median         1984.00  118.00     5500.00  443.00  1250.00    11.10
## mean           2155.22  133.16     5428.28  435.24  1287.95    11.56
## SE.mean          40.10    3.11       33.40    1.84    14.31     0.15
## CI.mean.0.95     78.80    6.10       65.63    3.62    28.12     0.29
## var          747772.93 4483.65   518632.81 1575.66 95248.47     9.85
## std.dev         864.74   66.96      720.16   39.69   308.62     3.14
## coef.var          0.40    0.50        0.13    0.09     0.24     0.27
##              Hitrost Poraba_90 Poraba_120 Motor Pogon         Cena
## median        190.00      6.30       9.00  2.00  1.00     26991.00
## mean          192.66      6.72      10.29  1.78  1.51     34263.33
## SE.mean         1.43      0.08       0.19  0.02  0.03      1307.38
## CI.mean.0.95    2.81      0.16       0.37  0.04  0.07      2569.12
## var           949.74      3.27      16.05  0.17  0.57 794796140.98
## std.dev        30.82      1.81       4.01  0.41  0.75     28192.13
## coef.var        0.16      0.27       0.39  0.23  0.50         0.82
summary(podatki) 
##        ID           Ccm             Km         Vrtljajimax  
##  Min.   :  1   Min.   : 796   Min.   : 39.0   Min.   :3400  
##  1st Qu.:117   1st Qu.:1598   1st Qu.: 90.0   1st Qu.:5000  
##  Median :233   Median :1984   Median :118.0   Median :5500  
##  Mean   :233   Mean   :2155   Mean   :133.2   Mean   :5428  
##  3rd Qu.:349   3rd Qu.:2461   3rd Qu.:150.0   3rd Qu.:6000  
##  Max.   :465   Max.   :5987   Max.   :485.0   Max.   :8250  
##     Dolžina           Teža         Pospešek        Hitrost     
##  Min.   :330.0   Min.   : 620   Min.   : 4.40   Min.   :125.0  
##  1st Qu.:404.0   1st Qu.:1065   1st Qu.: 9.40   1st Qu.:170.0  
##  Median :443.0   Median :1250   Median :11.10   Median :190.0  
##  Mean   :435.2   Mean   :1288   Mean   :11.56   Mean   :192.7  
##  3rd Qu.:467.0   3rd Qu.:1460   3rd Qu.:13.30   3rd Qu.:210.0  
##  Max.   :511.0   Max.   :2200   Max.   :25.00   Max.   :320.0  
##    Poraba_90        Poraba_120        Motor           Pogon      
##  Min.   : 3.600   Min.   : 4.90   Min.   :1.000   Min.   :1.000  
##  1st Qu.: 5.400   1st Qu.: 7.40   1st Qu.:2.000   1st Qu.:1.000  
##  Median : 6.300   Median : 9.00   Median :2.000   Median :1.000  
##  Mean   : 6.723   Mean   :10.29   Mean   :1.781   Mean   :1.508  
##  3rd Qu.: 7.600   3rd Qu.:12.30   3rd Qu.:2.000   3rd Qu.:2.000  
##  Max.   :15.500   Max.   :35.60   Max.   :2.000   Max.   :3.000  
##       Cena              MotorF        PogonF   
##  Min.   :  8155   dizelski :102   spredaj:303  
##  1st Qu.: 17794   bencinski:363   zadaj  : 88  
##  Median : 26991                   4x4    : 74  
##  Mean   : 34263                                
##  3rd Qu.: 39314                                
##  Max.   :240320
t.test(podatki$Poraba_120 ~ podatki$Motor,
      paired = FALSE,
      alternative = "two.sided",
      var.equal = FALSE)
## 
##  Welch Two Sample t-test
## 
## data:  podatki$Poraba_120 by podatki$Motor
## t = -3.3364, df = 192.95, p-value = 0.001018
## alternative hypothesis: true difference in means between group 1 and group 2 is not equal to 0
## 95 percent confidence interval:
##  -2.1199546 -0.5447189
## sample estimates:
## mean in group 1 mean in group 2 
##        9.251961       10.584298
fit <- lm(Pospešek ~ Teža + Km + PogonF + MotorF + Km:MotorF,
          data=podatki)
summary(fit)
## 
## Call:
## lm(formula = Pospešek ~ Teža + Km + PogonF + MotorF + Km:MotorF, 
##     data = podatki)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.2557 -1.1652 -0.3618  0.6809  8.2220 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        17.4965680  0.6049569  28.922  < 2e-16 ***
## Teža                0.0025855  0.0004173   6.195 1.30e-09 ***
## Km                 -0.0690078  0.0054289 -12.711  < 2e-16 ***
## PogonFzadaj         0.8896774  0.2627070   3.387 0.000769 ***
## PogonF4x4           0.9718422  0.2508565   3.874 0.000123 ***
## MotorFbencinski    -4.5085332  0.5578018  -8.083 5.69e-15 ***
## Km:MotorFbencinski  0.0281638  0.0050838   5.540 5.11e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.76 on 458 degrees of freedom
## Multiple R-squared:  0.6896, Adjusted R-squared:  0.6856 
## F-statistic: 169.6 on 6 and 458 DF,  p-value: < 2.2e-16
podatki_clu_std <- as.data.frame(scale(podatki[c(2:9)])) 
podatki_clu_std$Različnost = sqrt(podatki_clu_std$Ccm^2 + podatki_clu_std$Km^2 + podatki_clu_std$Vrtljajimax^2 + podatki_clu_std$Dolžina^2 + podatki_clu_std$Teža^2 + podatki_clu_std$Pospešek^2 + podatki_clu_std$Hitrost^2 + podatki_clu_std$Poraba_90^2)
head(podatki_clu_std[order(-podatki_clu_std$Različnost), ]) 
##          Ccm       Km Vrtljajimax   Dolžina     Teža  Pospešek
## 107 3.837900 5.254518  2.18245503 0.4978333 1.302704 -2.280301
## 105 3.837900 4.612344  1.14102219 0.9512956 1.302704 -2.025343
## 209 4.431143 3.895499 -0.31698379 1.9086049 2.647385 -1.579165
## 208 4.431143 3.895499 -0.31698379 1.8078355 2.841796 -1.579165
## 210 4.431143 3.895499 -0.31698379 0.3718716 1.999346 -1.451686
## 215 4.405702 3.701354  0.09958935 0.3718716 1.837336 -1.897863
##      Hitrost Poraba_90 Različnost
## 107 4.131863  4.851123   9.739244
## 105 3.807377  2.695662   8.109985
## 209 1.860458  3.193076   7.855740
## 208 1.860458  1.811371   7.449204
## 210 1.860458  2.308785   7.067198
## 215 1.860458  2.308785   7.001994
print(podatki[c(107, 105, 209, 208), ]) 
##      ID  Ccm  Km Vrtljajimax Dolžina Teža Pospešek Hitrost Poraba_90
## 107 107 5474 485        7000     455 1690      4.4     320      15.5
## 105 105 5474 442        6250     473 1690      5.2     310      11.6
## 209 209 5987 394        5200     511 2105      6.6     250      12.5
## 208 208 5987 394        5200     507 2165      6.6     250      10.0
##     Poraba_120 Motor Pogon   Cena    MotorF PogonF
## 107       35.6     2     2 213644 bencinski  zadaj
## 105       12.7     2     2 240320 bencinski  zadaj
## 209       24.5     2     2 154342 bencinski  zadaj
## 208       20.6     2     2 169724 bencinski  zadaj
podatki <- podatki[c(-107, -105, -209, -208),]
podatki_clu_std <- as.data.frame(scale(podatki[c(2:9)])) 

head(podatki_clu_std, 4)
##          Ccm        Km Vrtljajimax    Dolžina       Teža   Pospešek
## 1 -0.1612971 0.2518182    0.523296  0.5113345 0.32094457 -0.5499112
## 2 -0.1937569 0.3171924    1.080110 -0.1466210 0.28801509 -1.0334610
## 3 -0.1937569 0.3171924    1.080110  0.2329687 0.05750873 -0.8400410
## 4 -0.1937569 0.3171924    1.080110 -0.1466210 0.28801509 -1.0334610
##     Hitrost  Poraba_90
## 1 0.6113771 -0.1003219
## 2 0.6113771 -0.2739703
## 3 0.6113771 -0.2739703
## 4 0.7801263 -0.2739703
library(factoextra) 
get_clust_tendency(podatki_clu_std, 
                   n = nrow(podatki_clu_std) - 1,
                   graph = FALSE) 
## $hopkins_stat
## [1] 0.8540707
## 
## $plot
## NULL
library(dplyr) 
WARD <- podatki_clu_std %>%
  get_dist(method = "euclidean") %>%  
  hclust(method = "ward.D2")      

WARD
## 
## Call:
## hclust(d = ., method = "ward.D2")
## 
## Cluster method   : ward.D2 
## Distance         : euclidean 
## Number of objects: 461
library(factoextra)
fviz_dend(WARD)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use
## `guides(<scale> = "none")` instead.

set.seed(1)
library(NbClust) 
Indeksi <- NbClust(podatki_clu_std,
                   distance = "euclidean",
                   method = "ward.D2", 
                   index ="all",
                   min.nc = 2, max.nc = 6) 

## *** : The Hubert index is a graphical method of determining the number of clusters.
##                 In the plot of Hubert index, we seek a significant knee that corresponds to a 
##                 significant increase of the value of the measure i.e the significant peak in Hubert
##                 index second differences plot. 
## 

## *** : The D index is a graphical method of determining the number of clusters. 
##                 In the plot of D index, we seek a significant knee (the significant peak in Dindex
##                 second differences plot) that corresponds to a significant increase of the value of
##                 the measure. 
##  
## ******************************************************************* 
## * Among all indices:                                                
## * 2 proposed 2 as the best number of clusters 
## * 4 proposed 3 as the best number of clusters 
## * 10 proposed 4 as the best number of clusters 
## * 3 proposed 5 as the best number of clusters 
## * 1 proposed 6 as the best number of clusters 
## 
##                    ***** Conclusion *****                            
##  
## * According to the majority rule, the best number of clusters is  4 
##  
##  
## *******************************************************************
library(factoextra)
fviz_nbclust(Indeksi, 
             ggtheme = theme_linedraw())
## Warning in if (class(best_nc) == "numeric") print(best_nc) else if
## (class(best_nc) == : the condition has length > 1 and only the first
## element will be used
## Warning in if (class(best_nc) == "matrix") .viz_NbClust(x,
## print.summary, : the condition has length > 1 and only the first
## element will be used
## Warning in if (class(best_nc) == "numeric") print(best_nc) else if
## (class(best_nc) == : the condition has length > 1 and only the first
## element will be used
## Warning in if (class(best_nc) == "matrix") {: the condition has length
## > 1 and only the first element will be used
## Among all indices: 
## ===================
## * 2 proposed  0 as the best number of clusters
## * 1 proposed  1 as the best number of clusters
## * 2 proposed  2 as the best number of clusters
## * 4 proposed  3 as the best number of clusters
## * 10 proposed  4 as the best number of clusters
## * 3 proposed  5 as the best number of clusters
## * 1 proposed  6 as the best number of clusters
## * 3 proposed  NA's as the best number of clusters
## 
## Conclusion
## =========================
## * According to the majority rule, the best number of clusters is  4 .

podatki$RazvrstitevWARD <- cutree(WARD, 
                                  k = 4)

head(podatki)
##   ID  Ccm  Km Vrtljajimax Dolžina Teža Pospešek Hitrost Poraba_90
## 1  1 1995 146        5800     455 1380      9.9     210       6.5
## 2  2 1969 150        6200     429 1370      8.4     210       6.2
## 3  3 1969 150        6200     444 1300      9.0     210       6.2
## 4  4 1969 150        6200     429 1370      8.4     215       6.2
## 5  5 1969 150        6200     410 1240      8.4     210       6.2
## 6  6 1747 140        6300     445 1270     10.0     205       6.1
##   Poraba_120 Motor Pogon  Cena    MotorF  PogonF RazvrstitevWARD
## 1        8.3     2     1 33263 bencinski spredaj               1
## 2        7.8     2     1 37387 bencinski spredaj               1
## 3        8.0     2     1 25453 bencinski spredaj               1
## 4        7.8     2     1 35994 bencinski spredaj               1
## 5        8.0     2     1 19061 bencinski spredaj               1
## 6        8.2     2     1 20215 bencinski spredaj               1
library(factoextra) 
MetodaVod <- hkmeans(podatki_clu_std, 
                     k = 4,
                     hc.metric = "euclidean",
                     hc.method = "ward.D2")

MetodaVod
## Hierarchical K-means clustering with 4 clusters of sizes 194, 144, 58, 65
## 
## Cluster means:
##          Ccm          Km Vrtljajimax    Dolžina        Teža
## 1 -0.1618055  0.07397687   0.5032048  0.2908855 -0.05311056
## 2 -0.7547205 -0.87724839  -0.2756734 -1.0166897 -0.97425877
## 3  1.7599239  2.01128548   0.4776949  0.9904028  1.17569166
## 4  0.5845297 -0.07203541  -1.3174010  0.5004334  1.26779376
##     Pospešek    Hitrost   Poraba_90
## 1 -0.4696519  0.3949607 -0.06332458
## 2  0.7697768 -0.8757249 -0.79129802
## 3 -1.2302157  1.6261996  1.36471230
## 4  0.7941093 -0.6898087  0.72428571
## 
## Clustering vector:
##   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17 
##   1   1   1   1   1   1   1   1   1   1   2   4   2   1   4   1   1 
##  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34 
##   3   1   1   3   1   1   1   1   1   1   1   1   3   3   1   3   3 
##  35  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51 
##   1   1   2   1   1   3   3   3   3   1   1   1   2   1   1   1   3 
##  52  53  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68 
##   3   3   3   3   1   1   1   1   1   3   4   4   4   4   4   4   4 
##  69  70  71  72  73  74  75  76  77  78  79  80  81  82  83  84  85 
##   1   1   1   1   4   2   2   4   4   1   2   2   2   2   2   2   2 
##  86  87  88  89  90  91  92  93  94  95  96  97  98  99 100 101 102 
##   1   1   1   1   1   1   2   3   2   2   1   2   1   2   2   2   2 
## 103 104 106 108 109 110 111 112 113 114 115 116 117 118 119 120 121 
##   2   2   3   3   2   2   2   2   2   1   1   2   2   1   2   1   1 
## 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 
##   2   2   2   1   1   1   1   4   2   2   2   2   2   1   2   1   3 
## 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 
##   3   1   1   1   1   2   1   1   1   1   2   2   1   1   1   1   1 
## 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 
##   4   4   3   4   4   4   4   4   4   2   2   2   1   1   1   2   2 
## 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 
##   2   2   1   2   1   3   1   4   4   2   3   3   4   4   2   2   1 
## 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 
##   2   1   1   1   1   1   1   1   4   4   4   4   1   2   4   4   4 
## 207 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 
##   4   3   1   1   1   1   3   1   3   3   3   1   1   1   1   3   3 
## 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 
##   3   3   3   3   3   3   3   3   3   3   4   4   2   4   4   2   2 
## 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 
##   1   2   3   1   1   3   1   1   1   4   4   1   2   2   2   1   2 
## 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 
##   3   1   1   4   1   2   4   2   2   2   4   4   2   1   2   2   1 
## 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 
##   1   1   1   1   1   1   1   1   2   2   2   1   1   3   4   4   4 
## 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 
##   1   1   1   1   1   2   2   1   1   1   1   1   2   2   1   2   3 
## 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 
##   2   1   1   3   2   2   2   1   1   4   4   4   3   2   2   4   2 
## 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 
##   2   2   1   4   1   1   1   1   1   3   2   2   2   1   1   1   1 
## 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 
##   1   1   2   1   1   2   1   1   1   1   1   1   1   1   1   1   1 
## 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 
##   3   4   2   2   2   2   2   2   2   2   2   2   2   2   2   4   1 
## 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 
##   1   4   4   4   4   3   1   1   1   1   2   2   2   2   2   2   2 
## 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 
##   2   2   2   1   2   2   2   2   2   1   1   4   4   2   2   4   1 
## 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 
##   1   2   1   2   2   3   1   1   2   1   3   1   1   3   3   4   4 
## 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 
##   2   2   1   2   2   2   2   2   2   2   2   2   2   2   4   1   2 
## 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 
##   1   1   1   1   1   4   1   2   1   4   2   3   1   1   3   1   1 
## 464 465 
##   1   2 
## 
## Within cluster sum of squares by cluster:
## [1] 421.4469 419.0552 300.9513 341.3704
##  (between_SS / total_SS =  59.7 %)
## 
## Available components:
## 
##  [1] "cluster"      "centers"      "totss"        "withinss"    
##  [5] "tot.withinss" "betweenss"    "size"         "iter"        
##  [9] "ifault"       "data"         "hclust"
library(factoextra)
fviz_cluster(MetodaVod, 
             palette = "Dark2", 
             repel = FALSE,
             ggtheme = theme_linedraw())

podatki$RazvrstitevVod <- MetodaVod$cluster
head(podatki)
##   ID  Ccm  Km Vrtljajimax Dolžina Teža Pospešek Hitrost Poraba_90
## 1  1 1995 146        5800     455 1380      9.9     210       6.5
## 2  2 1969 150        6200     429 1370      8.4     210       6.2
## 3  3 1969 150        6200     444 1300      9.0     210       6.2
## 4  4 1969 150        6200     429 1370      8.4     215       6.2
## 5  5 1969 150        6200     410 1240      8.4     210       6.2
## 6  6 1747 140        6300     445 1270     10.0     205       6.1
##   Poraba_120 Motor Pogon  Cena    MotorF  PogonF RazvrstitevWARD
## 1        8.3     2     1 33263 bencinski spredaj               1
## 2        7.8     2     1 37387 bencinski spredaj               1
## 3        8.0     2     1 25453 bencinski spredaj               1
## 4        7.8     2     1 35994 bencinski spredaj               1
## 5        8.0     2     1 19061 bencinski spredaj               1
## 6        8.2     2     1 20215 bencinski spredaj               1
##   RazvrstitevVod
## 1              1
## 2              1
## 3              1
## 4              1
## 5              1
## 6              1
table(podatki$RazvrstitevWARD)
## 
##   1   2   3   4 
## 191 159  56  55
table(podatki$RazvrstitevVod)
## 
##   1   2   3   4 
## 194 144  58  65
table(podatki$RazvrstitevWARD, podatki$RazvrstitevVod)
##    
##       1   2   3   4
##   1 175   6   2   8
##   2  15 136   0   8
##   3   1   0  54   1
##   4   3   2   2  48
Povprečja <- MetodaVod$centers
Slika <- as.data.frame(Povprečja)
Slika$id <- 1:nrow(Slika)

library(tidyr)
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:pastecs':
## 
##     extract
## The following object is masked from 'package:magrittr':
## 
##     extract
Slika <- pivot_longer(Slika, cols = c("Ccm", "Km", "Vrtljajimax", "Dolžina", "Teža", "Pospešek", "Hitrost", "Poraba_90"))

Slika$Skupina <- factor(Slika$id, 
                        levels = c(1, 2, 3, 4), 
                        labels = c("1", "2", "3", "4"))

Slika$ImeF <- factor(Slika$name, 
                     levels = c("Ccm", "Km", "Vrtljajimax", "Dolžina", "Teža", "Pospešek", "Hitrost", "Poraba_90"), 
                     labels = c("Ccm", "Km", "Vrtljajimax", "Dolžina", "Teža", "Pospešek", "Hitrost", "Poraba_90"))

library(ggplot2)
ggplot(Slika, aes(x=ImeF, y=value)) +
  geom_hline(yintercept=0) +
  theme_linedraw() +
  geom_point(aes(shape=Skupina, col=Skupina), size=3) +
  geom_line((aes(group = id, linetype = Skupina)), size = 1) +
  ylab("Povprečje") +
  xlab("Razvrstitvene spremenljivke")+
  ylim(-2, 3.5)

fit <- aov(cbind(Ccm, Km, Vrtljajimax, Dolžina, Teža, Pospešek, Hitrost, Poraba_90) ~ as.factor(RazvrstitevVod), 
             data = podatki)

summary(fit)
##  Response Ccm :
##                            Df    Sum Sq  Mean Sq F value    Pr(>F)
## as.factor(RazvrstitevVod)   3 185390013 61796671  257.35 < 2.2e-16
## Residuals                 457 109739282   240130                  
##                              
## as.factor(RazvrstitevVod) ***
## Residuals                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response Km :
##                            Df  Sum Sq Mean Sq F value    Pr(>F)    
## as.factor(RazvrstitevVod)   3 1298491  432830  466.92 < 2.2e-16 ***
## Residuals                 457  423636     927                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response Vrtljajimax :
##                            Df    Sum Sq  Mean Sq F value    Pr(>F)
## as.factor(RazvrstitevVod)   3  96045389 32015130  103.51 < 2.2e-16
## Residuals                 457 141342344   309283                  
##                              
## as.factor(RazvrstitevVod) ***
## Residuals                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response Dolžina :
##                            Df Sum Sq Mean Sq F value    Pr(>F)    
## as.factor(RazvrstitevVod)   3 372322  124107  163.93 < 2.2e-16 ***
## Residuals                 457 345987     757                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response Teža :
##                            Df   Sum Sq Mean Sq F value    Pr(>F)    
## as.factor(RazvrstitevVod)   3 29683598 9894533  354.98 < 2.2e-16 ***
## Residuals                 457 12738105   27873                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response Pospešek :
##                            Df Sum Sq Mean Sq F value    Pr(>F)    
## as.factor(RazvrstitevVod)   3 2472.0  823.99  192.66 < 2.2e-16 ***
## Residuals                 457 1954.5    4.28                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response Hitrost :
##                            Df Sum Sq Mean Sq F value    Pr(>F)    
## as.factor(RazvrstitevVod)   3 285332   95111  366.76 < 2.2e-16 ***
## Residuals                 457 118513     259                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response Poraba_90 :
##                            Df Sum Sq Mean Sq F value    Pr(>F)    
## as.factor(RazvrstitevVod)   3 695.63 231.875  156.45 < 2.2e-16 ***
## Residuals                 457 677.34   1.482                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
aggregate(podatki$Cena, 
          by = list(podatki$RazvrstitevVod), 
          FUN = mean)
##   Group.1        x
## 1       1 30379.46
## 2       2 15801.22
## 3       3 78875.48
## 4       4 37086.88
fit <- aov(Cena ~ as.factor(RazvrstitevVod), 
           data = podatki)

summary(fit)
##                            Df    Sum Sq   Mean Sq F value Pr(>F)    
## as.factor(RazvrstitevVod)   3 1.671e+11 5.569e+10   272.4 <2e-16 ***
## Residuals                 457 9.342e+10 2.044e+08                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
hi_kvadrat <- chisq.test(podatki$MotorF, as.factor(podatki$RazvrstitevVod))
hi_kvadrat
## 
##  Pearson's Chi-squared test
## 
## data:  podatki$MotorF and as.factor(podatki$RazvrstitevVod)
## X-squared = 155.29, df = 3, p-value < 2.2e-16
addmargins(hi_kvadrat$observed) 
##               
## podatki$MotorF   1   2   3   4 Sum
##      dizelski    9  44   1  48 102
##      bencinski 185 100  57  17 359
##      Sum       194 144  58  65 461
addmargins(round(hi_kvadrat$expected, 2)) 
##               
## podatki$MotorF      1      2     3     4    Sum
##      dizelski   42.92  31.86 12.83 14.38 101.99
##      bencinski 151.08 112.14 45.17 50.62 359.01
##      Sum       194.00 144.00 58.00 65.00 461.00
round(hi_kvadrat$res, 2)
##               
## podatki$MotorF     1     2     3     4
##      dizelski  -5.18  2.15 -3.30  8.86
##      bencinski  2.76 -1.15  1.76 -4.73
hi_kvadrat <- chisq.test(podatki$PogonF, as.factor(podatki$RazvrstitevVod))
hi_kvadrat
## 
##  Pearson's Chi-squared test
## 
## data:  podatki$PogonF and as.factor(podatki$RazvrstitevVod)
## X-squared = 189.9, df = 6, p-value < 2.2e-16
addmargins(hi_kvadrat$observed) 
##               
## podatki$PogonF   1   2   3   4 Sum
##        spredaj 145 127  10  21 303
##        zadaj    30   4  38  12  84
##        4x4      19  13  10  32  74
##        Sum     194 144  58  65 461
addmargins(round(hi_kvadrat$expected, 2)) 
##               
## podatki$PogonF      1      2     3     4    Sum
##        spredaj 127.51  94.65 38.12 42.72 303.00
##        zadaj    35.35  26.24 10.57 11.84  84.00
##        4x4      31.14  23.11  9.31 10.43  73.99
##        Sum     194.00 144.00 58.00 64.99 460.99
round(hi_kvadrat$res, 2)
##               
## podatki$PogonF     1     2     3     4
##        spredaj  1.55  3.33 -4.55 -3.32
##        zadaj   -0.90 -4.34  8.44  0.05
##        4x4     -2.18 -2.10  0.23  6.68
library(DescTools)
## 
## Attaching package: 'DescTools'
## The following objects are masked from 'package:Hmisc':
## 
##     %nin%, Label, Mean, Quantile
CramerV(podatki$MotorF, as.factor(podatki$RazvrstitevVod)) 
## [1] 0.580393
CramerV(podatki$PogonF, as.factor(podatki$RazvrstitevVod)) 
## [1] 0.4538315

```